import os
from shapely.geometry import Polygon, box
import math
import numpy as np
import pandas
import geopandas
import seaborn
import contextily
import matplotlib.pyplot as plt
from pointpats import centrography
from matplotlib.patches import Ellipse
from matplotlib import colors
from matplotlib.lines import Line2D
from matplotlib.patches import Patch
from matplotlib import gridspec
import warnings
import subprocess
warnings.filterwarnings("ignore")
db = pandas.read_csv('Daten/Straftaten.csv')
db = geopandas.GeoDataFrame(db, geometry=geopandas.points_from_xy(db.long, db.lat), crs="epsg:4326")
db = db.to_crs(epsg=3857)
db['long'] = db.geometry.apply(lambda x: x.x)
db['lat'] = db.geometry.apply(lambda x: x.y)
wvz_polygon = pandas.read_csv('Daten/WVZ_Eckpunkte.csv')
wvz_polygon = geopandas.GeoDataFrame(wvz_polygon, geometry=geopandas.points_from_xy(wvz_polygon.lon, wvz_polygon.lat), crs="epsg:4326")
wvz_polygon = wvz_polygon.to_crs(epsg=3857)
wvz_polygon['lon'] = wvz_polygon.geometry.apply(lambda x: x.x)
wvz_polygon['lat'] = wvz_polygon.geometry.apply(lambda x: x.y)
wvz_polygon = wvz_polygon[['lon', 'lat']].to_numpy()
def plot_geodensity(data,
ax=None,
show_legend=True,
title=None,
xlim=(1379139.5582129187, 1383323.7984075241),
ylim=(6681082.251136509, 6683512.324276414)):
if ax is None:
ax = plt.gca()
mean_center = centrography.mean_center(data[['long', 'lat']])
med_center = centrography.euclidean_median(data[['long', 'lat']])
major, minor, rotation = centrography.ellipse(db[['long', 'lat']])
seaborn.kdeplot(
data['long'],
data['lat'],
n_levels=250,
shade=True,
shade_lowest=False,
cbar=False,
cbar_kws={"label": 'Crime Density', "pad": 0.02},
alpha=0.65,
cmap='magma_r')
contextily.add_basemap(
ax,
source=contextily.providers.Stamen.TonerLite)
ellipse = Ellipse(xy=mean_center, # center the ellipse on our mean center
width=major * 2, # centrography.ellipse only gives half the axis
height=minor * 2,
angle=np.rad2deg(rotation), # Angles for this are in degrees, not radians
facecolor='none',
edgecolor='red',
linestyle='--',
linewidth=2,
label='Standard Ellipse')
ax.add_patch(ellipse)
ax.add_patch(
plt.Polygon(
wvz_polygon,
closed=True,
edgecolor='darkred',
facecolor='none',
linestyle='-',
linewidth=2.5,
label='Weapon Prohibition Zone'
)
)
ax.scatter(
*mean_center, color='red', marker='x', s=50, label='Mean Center'
)
ax.scatter(
*med_center, color='darkgreen', marker='o', s=50, label='Median Center'
)
if show_legend:
ax.legend(prop=dict(size=18, weight='semibold'), loc='lower right')
ax.set_axis_off()
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_title(title, fontsize=20, loc="left", pad=5)
return ax
#plt.savefig("Figures/" + filename + ".pdf", bbox_inches='tight')
def plot_hotspot(data,
column,
ax=None,
show_legend=True,
title=None,
xlim=(1379139.5582129187, 1383323.7984075241),
ylim=(6681082.251136509, 6683512.324276414)):
if ax is None:
ax = plt.gca()
hmap = colors.ListedColormap(['white', 'red', 'blue', 'pink', 'lightblue'])
data.plot(ax=ax,
column=column,
edgecolor='dimgray',
cmap=hmap,
categorical=True,
alpha=0.65,
legend=show_legend)
contextily.add_basemap(
ax,
source=contextily.providers.Stamen.TonerLite)
ax.add_patch(
plt.Polygon(
wvz_polygon,
closed=True,
edgecolor='darkred',
facecolor='none',
linestyle='-',
linewidth=2.5,
label='Weapon-Prohibition Zone'
)
)
if show_legend:
legend_elements = [Line2D([0], [0], marker='o', color='w', label='Not Significant',
markerfacecolor='white', markersize=15),
Line2D([0], [0], marker='o', color='w', label='High-High',
markerfacecolor='red', markersize=15),
Line2D([0], [0], marker='o', color='w', label='Low-High',
markerfacecolor='pink', markersize=15),
Line2D([0], [0], marker='o', color='w', label='Low-Low',
markerfacecolor='lightblue', markersize=15),
Line2D([0], [0], marker='o', color='w', label='High-Low',
markerfacecolor='blue', markersize=15),
Patch(facecolor='w', edgecolor='darkred', linewidth=2.5,
label='Weapon Prohibition Zone')]
ax.legend(handles=legend_elements, prop=dict(size=18, weight='semibold'), loc='lower right')
ax.set_axis_off()
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_title(title, fontsize=22, loc="left", pad=5)
return ax
def create_grid(feature, shape="hexagon", side_length=50):
'''Create a grid consisting of either rectangles or hexagons with a specified side length that covers the extent of input feature.'''
'''Source: https://pygis.io/docs/e_summarize_vector.html'''
# Slightly displace the minimum and maximum values of the feature extent by creating a buffer
# This decreases likelihood that a feature will fall directly on a cell boundary (in between two cells)
# Buffer is projection dependent (due to units)
feature = feature.buffer(20)
# Get extent of buffered input feature
min_x, min_y, max_x, max_y = feature.total_bounds
# Create empty list to hold individual cells that will make up the grid
cells_list = []
# Create grid of squares if specified
if shape in ["square", "rectangle", "box"]:
# Adapted from https://james-brennan.github.io/posts/fast_gridding_geopandas/
# Create and iterate through list of x values that will define column positions with specified side length
for x in np.arange(min_x - side_length, max_x + side_length, side_length):
# Create and iterate through list of y values that will define row positions with specified side length
for y in np.arange(min_y - side_length, max_y + side_length, side_length):
# Create a box with specified side length and append to list
cells_list.append(box(x, y, x + side_length, y + side_length))
# Otherwise, create grid of hexagons
elif shape == "hexagon":
# Set horizontal displacement that will define column positions with specified side length (based on normal hexagon)
x_step = 1.5 * side_length
# Set vertical displacement that will define row positions with specified side length (based on normal hexagon)
# This is the distance between the centers of two hexagons stacked on top of each other (vertically)
y_step = math.sqrt(3) * side_length
# Get apothem (distance between center and midpoint of a side, based on normal hexagon)
apothem = (math.sqrt(3) * side_length / 2)
# Set column number
column_number = 0
# Create and iterate through list of x values that will define column positions with vertical displacement
for x in np.arange(min_x, max_x + x_step, x_step):
# Create and iterate through list of y values that will define column positions with horizontal displacement
for y in np.arange(min_y, max_y + y_step, y_step):
# Create hexagon with specified side length
hexagon = [[x + math.cos(math.radians(angle)) * side_length, y + math.sin(math.radians(angle)) * side_length] for angle in range(0, 360, 60)]
# Append hexagon to list
cells_list.append(Polygon(hexagon))
# Check if column number is even
if column_number % 2 == 0:
# If even, expand minimum and maximum y values by apothem value to vertically displace next row
# Expand values so as to not miss any features near the feature extent
min_y -= apothem
max_y += apothem
# Else, odd
else:
# Revert minimum and maximum y values back to original
min_y += apothem
max_y -= apothem
# Increase column number by 1
column_number += 1
# Else, raise error
else:
raise Exception("Specify a rectangle or hexagon as the grid shape.")
# Create grid from list of cells
grid = geopandas.GeoDataFrame(cells_list, columns=['geometry'], crs=3857)
# Create a column that assigns each grid a number
grid["Grid_ID"] = np.arange(len(grid))
# Return grid
return grid
def create_data_hotspots(data, grid_data, filename, folder):
data = data.rename_axis("ID").reset_index()
grid_cell = geopandas.sjoin(data, grid_data, how="inner", op="intersects")
grid_cell = grid_cell.drop_duplicates(subset=['ID']).reset_index(drop=True)
count_field = "Count"
grid_cell[count_field] = 1
grid_cell = grid_cell.groupby('Grid_ID').agg({count_field: 'sum'})
grid_data = grid_data.merge(grid_cell, on='Grid_ID', how="left")
grid_data[count_field] = grid_data[count_field].fillna(0)
grid_data["Count"] = grid_data["Count"].astype(int)
grid_data.to_file("Daten/" + folder + "/" + filename + ".shp")
def export_grid(data,
grid_data,
filename,
before=pandas.to_datetime(["2018-05-05", "2017-11-05", "2017-05-05", "2016-11-05"]),
after=pandas.to_datetime(["2019-05-05", "2019-11-05", "2020-05-05", "2020-11-05"]),
label=["_06_month", "_12_month", "_18_month", "_24_month"],
folder="01_Hotspots_raw"):
if len(before) != len(after):
ValueError('length of before should be equal to length after')
imp_wvz = pandas.to_datetime("2018-11-05")
for i in range(0, len(before)):
create_data_hotspots(data=data[(before[i] < data.date) & (data.date <= imp_wvz)],
grid_data=grid_data,
filename=filename + label[i] + "_before",
folder=folder)
create_data_hotspots(data=data[(imp_wvz < data.date) & (data.date <= after[i])],
grid_data=grid_data,
filename=filename + label[i] + "_after",
folder=folder)
db.date = pandas.to_datetime(db.date)
db["year"] = db.date.dt.year
db["wvz"] = db['date'].apply(lambda x: 'before wpz' if x <= pandas.to_datetime("2018-11-05") else 'after wpz')
# ### Total police-recorded crimes by year
db.groupby("year").pks_crime.count()
year 2016 5128 2017 4789 2018 5262 2019 5275 2020 4156 Name: pks_crime, dtype: int64
# ### Density of total police-recorded crimes before and after implementation of the WPZ
fig = plt.figure(figsize=(24, 24), constrained_layout=True)
fig.suptitle('Total police-recorded crimes',
fontsize=24,
fontweight='bold',
x=0.09,
horizontalalignment='left',
verticalalignment='top')
gs = gridspec.GridSpec(2, 1, figure=fig)
ax_1 = fig.add_subplot(gs[0, 0])
plot_geodensity(db[db.wvz == 'before wpz'],
title="Before implementation of WPZ")
ax_2 = fig.add_subplot(gs[1, 0])
plot_geodensity(db[db.wvz == 'after wpz'],
show_legend=False,
title="After implementation of WPZ")
<AxesSubplot:title={'left':'After implementation of WPZ'}, xlabel='long', ylabel='lat'>
crime_types = [x for x in db.pks_crime.unique() if str(x) != 'nan']
crime_types
['Sonstige Straftatbestände (StGB)', 'Sonstiger schwerer Diebstahl insgesamt §§ 243 - 244a StGB', 'Rohheitsdelikte und Straftaten gegen die persönliche Freiheit', 'Vermögens- und Fälschungsdelikte', 'Sonstiger einfacher Diebstahl §§ 242, 247, 248a-c StGB', 'Strafrechtliche Nebengesetze', 'Straftaten gegen das Leben', 'Straftaten gegen die sexuelle Selbstbestimmung insgesamt']
db_theft = db[db.pks_crime.isin(["Sonstiger einfacher Diebstahl §§ 242, 247, 248a-c StGB",
"Sonstiger schwerer Diebstahl insgesamt §§ 243 - 244a StGB"])]
db_violence = db[db.pks_crime.isin(['Straftaten gegen das Leben',
'Rohheitsdelikte und Straftaten gegen die persönliche Freiheit'])]
fig = plt.figure(figsize=(24, 24), constrained_layout=True)
fig.suptitle('Police-recorded violent crimes',
fontsize=24,
fontweight='bold',
x=0.09,
horizontalalignment='left',
verticalalignment='top')
gs = gridspec.GridSpec(2, 1, figure=fig)
ax_1 = fig.add_subplot(gs[0, 0])
plot_geodensity(db_violence[db_violence.wvz == 'before wpz'],
title="Before implementation of WPZ")
ax_2 = fig.add_subplot(gs[1, 0])
plot_geodensity(db_violence[db_violence.wvz == 'after wpz'],
show_legend=False,
title="After implementation of WPZ")
<AxesSubplot:title={'left':'After implementation of WPZ'}, xlabel='long', ylabel='lat'>
fig = plt.figure(figsize=(24, 24), constrained_layout=True)
fig.suptitle('Police-recorded petty and grand theft',
fontsize=24,
fontweight='bold',
x=0.09,
horizontalalignment='left',
verticalalignment='top')
gs = gridspec.GridSpec(2, 1, figure=fig)
ax_1 = fig.add_subplot(gs[0, 0])
plot_geodensity(db_theft[db_theft.wvz == 'before wpz'],
title="Before implementation of WPZ")
ax_2 = fig.add_subplot(gs[1, 0])
plot_geodensity(db_theft[db_theft.wvz == 'after wpz'],
show_legend=False,
title="After implementation of WPZ")
<AxesSubplot:title={'left':'After implementation of WPZ'}, xlabel='long', ylabel='lat'>
grid_data = create_grid(feature=db,
shape="hexagon",
side_length=25)
grid_data["wvz"] = [geopandas.GeoSeries(Polygon(wvz_polygon)).contains(x).item() for x in grid_data.geometry]
export_data = [db, db_theft, db_violence]
export_label = ["Total", "Theft", "Violence"]
for i in range(0, 3):
export_grid(data=export_data[i],
grid_data=grid_data,
filename=export_label[i])
subprocess.call(["C:/Program Files/R/R-4.2.2/bin/Rscript",
"E:/Documents/Publikationen/Work-in-Progress/Anschlussforschung WZV/Datenanalyse/Skripte/02_Export_Hotspot_Analysis.R"],
shell=True)
0
hotspots_data_names = [x for x in os.listdir("Daten/02_Hotspots_final") if ".shp" in x]
hotspot_data = [geopandas.read_file("Daten/02_Hotspots_final/" + x) for x in hotspots_data_names]
fig = plt.figure(figsize=(40, 24), constrained_layout=True)
fig.suptitle('Cluster Map of total police-recored crimes',
fontsize=24,
fontweight='bold',
x=0.003,
horizontalalignment='left',
verticalalignment='top')
gs = gridspec.GridSpec(2, 2, figure=fig)
ax_1 = fig.add_subplot(gs[0, 0])
plot_hotspot(hotspot_data[4], show_legend=True, column="c_vals", title="6-month comparison")
ax_2 = fig.add_subplot(gs[0, 1])
plot_hotspot(hotspot_data[5], show_legend=False, column="c_vals", title="12-month comparison")
ax_3 = fig.add_subplot(gs[1, 0])
plot_hotspot(hotspot_data[6], show_legend=False, column="c_vals", title="18-month comparison")
ax_4 = fig.add_subplot(gs[1, 1])
plot_hotspot(hotspot_data[7], show_legend=False, column="c_vals", title="24-month comparison")
<AxesSubplot:title={'left':'24-month comparison'}>
fig = plt.figure(figsize=(40, 24), constrained_layout=True)
fig.suptitle('Cluster Map of Petty and Grand theft',
fontsize=24,
fontweight='bold',
x=0.003,
horizontalalignment='left',
verticalalignment='top')
gs = gridspec.GridSpec(2, 2, figure=fig)
ax_1 = fig.add_subplot(gs[0, 0])
plot_hotspot(hotspot_data[0], show_legend=True, column="c_vals", title="6-month comparison")
ax_2 = fig.add_subplot(gs[0, 1])
plot_hotspot(hotspot_data[1], show_legend=False, column="c_vals", title="12-month comparison")
ax_3 = fig.add_subplot(gs[1, 0])
plot_hotspot(hotspot_data[2], show_legend=False, column="c_vals", title="18-month comparison")
ax_4 = fig.add_subplot(gs[1, 1])
plot_hotspot(hotspot_data[3], show_legend=False, column="c_vals", title="24-month comparison")
<AxesSubplot:title={'left':'24-month comparison'}>
fig = plt.figure(figsize=(40, 24), constrained_layout=True)
fig.suptitle('Cluster Map of Violence',
fontsize=24,
fontweight='bold',
x=0.003,
horizontalalignment='left',
verticalalignment='top')
gs = gridspec.GridSpec(2, 2, figure=fig)
ax_1 = fig.add_subplot(gs[0, 0])
plot_hotspot(hotspot_data[8], show_legend=True, column="c_vals", title="6-month comparison")
ax_2 = fig.add_subplot(gs[0, 1])
plot_hotspot(hotspot_data[9], show_legend=False, column="c_vals", title="12-month comparison")
ax_3 = fig.add_subplot(gs[1, 0])
plot_hotspot(hotspot_data[10], show_legend=False, column="c_vals", title="18-month comparison")
ax_4 = fig.add_subplot(gs[1, 1])
plot_hotspot(hotspot_data[11], show_legend=False, column="c_vals", title="24-month comparison")
<AxesSubplot:title={'left':'24-month comparison'}>